(rm(list = ls()))
## NULL
library(car)
## Loading required package: carData
library(formattable)
data(Salaries)
?(Salaries)
formattable(Salaries)
rank discipline yrs.since.phd yrs.service sex salary
Prof B 19 18 Male 139750
Prof B 20 16 Male 173200
AsstProf B 4 3 Male 79750
Prof B 45 39 Male 115000
Prof B 40 41 Male 141500
AssocProf B 6 6 Male 97000
Prof B 30 23 Male 175000
Prof B 45 45 Male 147765
Prof B 21 20 Male 119250
Prof B 18 18 Female 129000
AssocProf B 12 8 Male 119800
AsstProf B 7 2 Male 79800
AsstProf B 1 1 Male 77700
AsstProf B 2 0 Male 78000
Prof B 20 18 Male 104800
Prof B 12 3 Male 117150
Prof B 19 20 Male 101000
Prof A 38 34 Male 103450
Prof A 37 23 Male 124750
Prof A 39 36 Female 137000
Prof A 31 26 Male 89565
Prof A 36 31 Male 102580
Prof A 34 30 Male 93904
Prof A 24 19 Male 113068
AssocProf A 13 8 Female 74830
Prof A 21 8 Male 106294
Prof A 35 23 Male 134885
AsstProf B 5 3 Male 82379
AsstProf B 11 0 Male 77000
Prof B 12 8 Male 118223
Prof B 20 4 Male 132261
AsstProf B 7 2 Male 79916
Prof B 13 9 Male 117256
AsstProf B 4 2 Male 80225
AsstProf B 4 2 Female 80225
AsstProf B 5 0 Female 77000
Prof B 22 21 Male 155750
AsstProf B 7 4 Male 86373
Prof B 41 31 Male 125196
AssocProf B 9 9 Male 100938
Prof B 23 2 Male 146500
AssocProf B 23 23 Male 93418
Prof B 40 27 Male 101299
Prof B 38 38 Male 231545
Prof B 19 19 Male 94384
Prof B 25 15 Male 114778
Prof B 40 28 Male 98193
Prof B 23 19 Female 151768
Prof B 25 25 Female 140096
AsstProf B 1 1 Male 70768
Prof B 28 28 Male 126621
Prof B 12 11 Male 108875
AsstProf B 11 3 Female 74692
Prof B 16 9 Male 106639
AssocProf B 12 11 Male 103760
AssocProf B 14 5 Male 83900
Prof B 23 21 Male 117704
AssocProf B 9 8 Male 90215
AssocProf B 10 9 Male 100135
AsstProf B 8 3 Male 75044
AssocProf B 9 8 Male 90304
AsstProf B 3 2 Male 75243
Prof B 33 31 Male 109785
AssocProf B 11 11 Female 103613
AsstProf B 4 3 Male 68404
AssocProf B 9 8 Male 100522
Prof B 22 12 Male 101000
Prof B 35 31 Male 99418
Prof B 17 17 Female 111512
Prof B 28 36 Male 91412
Prof B 17 2 Male 126320
Prof B 45 45 Male 146856
Prof B 29 19 Male 100131
Prof B 35 34 Male 92391
Prof B 28 23 Male 113398
AsstProf B 8 3 Male 73266
Prof B 17 3 Male 150480
Prof B 26 19 Male 193000
AsstProf B 3 1 Male 86100
AsstProf B 6 2 Male 84240
Prof B 43 28 Male 150743
Prof B 17 16 Male 135585
Prof B 22 20 Male 144640
AsstProf B 6 2 Male 88825
Prof B 17 18 Female 122960
Prof B 15 14 Male 132825
Prof B 37 37 Male 152708
AsstProf B 2 2 Male 88400
Prof B 25 25 Male 172272
AssocProf B 9 7 Male 107008
AsstProf B 10 5 Female 97032
AssocProf B 10 7 Male 105128
AssocProf B 10 7 Male 105631
Prof B 38 38 Male 166024
Prof B 21 20 Male 123683
AsstProf B 4 0 Male 84000
AssocProf B 17 12 Male 95611
Prof B 13 7 Male 129676
Prof B 30 14 Male 102235
Prof B 41 26 Male 106689
Prof B 42 25 Male 133217
Prof B 28 23 Male 126933
Prof B 16 5 Male 153303
Prof B 20 14 Female 127512
AssocProf A 18 10 Male 83850
Prof A 31 28 Male 113543
AssocProf A 11 8 Male 82099
AssocProf A 10 8 Male 82600
AssocProf A 15 8 Male 81500
Prof A 40 31 Male 131205
Prof A 20 16 Male 112429
AssocProf A 19 16 Male 82100
AsstProf A 3 1 Male 72500
Prof A 37 37 Male 104279
Prof A 12 0 Female 105000
Prof A 21 9 Male 120806
Prof A 30 29 Male 148500
Prof A 39 36 Male 117515
AsstProf A 4 1 Male 72500
AsstProf A 5 3 Female 73500
Prof A 14 14 Male 115313
Prof A 32 32 Male 124309
Prof A 24 22 Male 97262
AssocProf A 25 22 Female 62884
Prof A 24 22 Male 96614
Prof A 54 49 Male 78162
Prof A 28 26 Male 155500
AsstProf A 2 0 Female 72500
Prof A 32 30 Male 113278
AsstProf A 4 2 Male 73000
AssocProf A 11 9 Male 83001
Prof A 56 57 Male 76840
AssocProf A 10 8 Female 77500
AsstProf A 3 1 Female 72500
Prof A 35 25 Male 168635
Prof A 20 18 Male 136000
Prof A 16 14 Male 108262
Prof A 17 14 Male 105668
AssocProf A 10 7 Male 73877
Prof A 21 18 Male 152664
AssocProf A 14 8 Male 100102
AssocProf A 15 10 Male 81500
Prof A 19 11 Male 106608
AsstProf B 3 3 Male 89942
Prof B 27 27 Male 112696
Prof B 28 28 Male 119015
AsstProf B 4 4 Male 92000
Prof B 27 27 Male 156938
Prof B 36 26 Female 144651
AsstProf B 4 3 Male 95079
Prof B 14 12 Male 128148
AsstProf B 4 4 Male 92000
Prof B 21 9 Male 111168
AssocProf B 12 10 Female 103994
AsstProf B 4 0 Male 92000
Prof B 21 21 Male 118971
AssocProf B 12 18 Male 113341
AsstProf B 1 0 Male 88000
AssocProf B 6 6 Male 95408
Prof B 15 16 Male 137167
AsstProf B 2 2 Male 89516
Prof B 26 19 Male 176500
AssocProf B 22 7 Male 98510
AsstProf B 3 3 Male 89942
AsstProf B 1 0 Male 88795
Prof B 21 8 Male 105890
Prof B 16 16 Male 167284
Prof B 18 19 Male 130664
AssocProf B 8 6 Male 101210
Prof B 25 18 Male 181257
AsstProf B 5 5 Male 91227
Prof B 19 19 Male 151575
Prof B 37 24 Male 93164
Prof B 20 20 Male 134185
AssocProf B 17 6 Male 105000
Prof B 28 25 Male 111751
AssocProf B 10 7 Male 95436
AssocProf B 13 9 Male 100944
Prof B 27 14 Male 147349
AsstProf B 3 3 Female 92000
Prof B 11 11 Male 142467
Prof B 18 5 Male 141136
AssocProf B 8 8 Male 100000
Prof B 26 22 Male 150000
Prof B 23 23 Male 101000
Prof B 33 30 Male 134000
AssocProf B 13 10 Female 103750
Prof B 18 10 Male 107500
AssocProf B 28 28 Male 106300
Prof B 25 19 Male 153750
Prof B 22 9 Male 180000
Prof B 43 22 Male 133700
Prof B 19 18 Male 122100
AssocProf B 19 19 Male 86250
AssocProf B 48 53 Male 90000
AssocProf B 9 7 Male 113600
AsstProf B 4 4 Male 92700
AsstProf B 4 4 Male 92000
Prof B 34 33 Male 189409
Prof B 38 22 Male 114500
AsstProf B 4 4 Male 92700
Prof B 40 40 Male 119700
Prof B 28 17 Male 160400
Prof B 17 17 Male 152500
Prof B 19 5 Male 165000
Prof B 21 2 Male 96545
Prof B 35 33 Male 162200
Prof B 18 18 Male 120000
AsstProf B 7 2 Male 91300
Prof B 20 20 Male 163200
AsstProf B 4 3 Male 91000
Prof B 39 39 Male 111350
Prof B 15 7 Male 128400
Prof B 26 19 Male 126200
AssocProf B 11 1 Male 118700
Prof B 16 11 Male 145350
Prof B 15 11 Male 146000
AssocProf B 29 22 Male 105350
AssocProf B 14 7 Female 109650
Prof B 13 11 Male 119500
Prof B 21 21 Male 170000
Prof B 23 10 Male 145200
AssocProf B 13 6 Male 107150
Prof B 34 20 Male 129600
Prof A 38 35 Male 87800
Prof A 20 20 Male 122400
AsstProf A 3 1 Male 63900
AssocProf A 9 7 Male 70000
Prof A 16 11 Male 88175
Prof A 39 38 Male 133900
Prof A 29 27 Female 91000
AssocProf A 26 24 Female 73300
Prof A 38 19 Male 148750
Prof A 36 19 Female 117555
AsstProf A 8 3 Male 69700
Prof A 28 17 Male 81700
Prof A 25 25 Male 114000
AsstProf A 7 6 Female 63100
Prof A 46 40 Male 77202
Prof A 19 6 Male 96200
AsstProf A 5 3 Male 69200
Prof A 31 30 Male 122875
Prof A 38 37 Male 102600
Prof A 23 23 Male 108200
Prof A 19 23 Male 84273
Prof A 17 11 Female 90450
Prof A 30 23 Male 91100
Prof A 21 18 Male 101100
Prof A 28 23 Male 128800
Prof A 29 7 Male 204000
Prof A 39 39 Male 109000
Prof A 20 8 Male 102000
Prof A 31 12 Male 132000
AsstProf A 4 2 Female 77500
Prof A 28 7 Female 116450
AssocProf A 12 8 Male 83000
Prof A 22 22 Male 140300
AssocProf A 30 23 Male 74000
AsstProf A 9 3 Male 73800
Prof A 32 30 Male 92550
AssocProf A 41 33 Male 88600
Prof A 45 45 Male 107550
Prof A 31 26 Male 121200
Prof A 31 31 Male 126000
Prof A 37 35 Male 99000
Prof A 36 30 Male 134800
Prof A 43 43 Male 143940
Prof A 14 10 Male 104350
Prof A 47 44 Male 89650
Prof A 13 7 Male 103700
Prof A 42 40 Male 143250
Prof A 42 18 Male 194800
AsstProf A 4 1 Male 73000
AsstProf A 8 4 Male 74000
AsstProf A 8 3 Female 78500
Prof A 12 6 Male 93000
Prof A 52 48 Male 107200
Prof A 31 27 Male 163200
Prof A 24 18 Male 107100
Prof A 46 46 Male 100600
Prof A 39 38 Male 136500
Prof A 37 27 Male 103600
Prof A 51 51 Male 57800
Prof A 45 43 Male 155865
AssocProf A 8 6 Male 88650
AssocProf A 49 49 Male 81800
Prof A 28 27 Male 115800
AsstProf A 2 0 Male 85000
Prof A 29 27 Male 150500
AsstProf A 8 5 Male 74000
Prof A 33 7 Male 174500
Prof A 32 28 Male 168500
Prof A 39 9 Male 183800
AssocProf A 11 1 Male 104800
Prof A 19 7 Male 107300
Prof A 40 36 Male 97150
Prof A 18 18 Male 126300
Prof A 17 11 Male 148800
Prof A 49 43 Male 72300
AssocProf A 45 39 Male 70700
Prof A 39 36 Male 88600
Prof A 27 16 Male 127100
Prof A 28 13 Male 170500
Prof A 14 4 Male 105260
Prof A 46 44 Male 144050
Prof A 33 31 Male 111350
AsstProf A 7 4 Male 74500
Prof A 31 28 Male 122500
AsstProf A 5 0 Male 74000
Prof A 22 15 Male 166800
Prof A 20 7 Male 92050
Prof A 14 9 Male 108100
Prof A 29 19 Male 94350
Prof A 35 35 Male 100351
Prof A 22 6 Male 146800
AsstProf B 6 3 Male 84716
AssocProf B 12 9 Female 71065
Prof B 46 45 Male 67559
Prof B 16 16 Male 134550
Prof B 16 15 Male 135027
Prof B 24 23 Male 104428
AssocProf B 9 9 Male 95642
AssocProf B 13 11 Male 126431
Prof B 24 15 Female 161101
Prof B 30 31 Male 162221
AsstProf B 8 4 Male 84500
Prof B 23 15 Male 124714
Prof B 37 37 Male 151650
AssocProf B 10 10 Male 99247
Prof B 23 23 Male 134778
Prof B 49 60 Male 192253
Prof B 20 9 Male 116518
Prof B 18 10 Female 105450
Prof B 33 19 Male 145098
AssocProf B 19 6 Female 104542
Prof B 36 38 Male 151445
Prof B 35 23 Male 98053
Prof B 13 12 Male 145000
Prof B 32 25 Male 128464
Prof B 37 15 Male 137317
Prof B 13 11 Male 106231
Prof B 17 17 Female 124312
Prof B 38 38 Male 114596
Prof B 31 31 Male 162150
Prof B 32 35 Male 150376
Prof B 15 10 Male 107986
Prof B 41 27 Male 142023
Prof B 39 33 Male 128250
AsstProf B 4 3 Male 80139
Prof B 27 28 Male 144309
Prof B 56 49 Male 186960
Prof B 38 38 Male 93519
Prof B 26 27 Male 142500
Prof B 22 20 Male 138000
AsstProf B 8 1 Male 83600
Prof B 25 21 Male 145028
Prof A 49 40 Male 88709
Prof A 39 35 Male 107309
Prof A 28 14 Female 109954
AsstProf A 11 4 Male 78785
Prof A 14 11 Male 121946
Prof A 23 15 Female 109646
Prof A 30 30 Male 138771
AssocProf A 20 17 Male 81285
Prof A 43 43 Male 205500
Prof A 43 40 Male 101036
Prof A 15 10 Male 115435
AssocProf A 10 1 Male 108413
Prof A 35 30 Male 131950
Prof A 33 31 Male 134690
AssocProf A 13 8 Male 78182
Prof A 23 20 Male 110515
Prof A 12 7 Male 109707
Prof A 30 26 Male 136660
Prof A 27 19 Male 103275
Prof A 28 26 Male 103649
AsstProf A 4 1 Male 74856
AsstProf A 6 3 Male 77081
Prof A 38 38 Male 150680
AssocProf A 11 8 Male 104121
AsstProf A 8 3 Male 75996
Prof A 27 23 Male 172505
AssocProf A 8 5 Male 86895
Prof A 44 44 Male 105000
Prof A 27 21 Male 125192
Prof A 15 9 Male 114330
Prof A 29 27 Male 139219
Prof A 29 15 Male 109305
Prof A 38 36 Male 119450
Prof A 33 18 Male 186023
Prof A 40 19 Male 166605
Prof A 30 19 Male 151292
Prof A 33 30 Male 103106
Prof A 31 19 Male 150564
Prof A 42 25 Male 101738
Prof A 25 15 Male 95329
AsstProf A 8 4 Male 81035
  1. Data Preprocessing Apply the labels “Theoretical” and “Applied” to discipline.
Salaries.o<-(Salaries)
Salaries<-na.omit(Salaries)
levels(Salaries$discipline) <- c("Theorectical", "Applied")
formattable(Salaries)
rank discipline yrs.since.phd yrs.service sex salary
Prof Applied 19 18 Male 139750
Prof Applied 20 16 Male 173200
AsstProf Applied 4 3 Male 79750
Prof Applied 45 39 Male 115000
Prof Applied 40 41 Male 141500
AssocProf Applied 6 6 Male 97000
Prof Applied 30 23 Male 175000
Prof Applied 45 45 Male 147765
Prof Applied 21 20 Male 119250
Prof Applied 18 18 Female 129000
AssocProf Applied 12 8 Male 119800
AsstProf Applied 7 2 Male 79800
AsstProf Applied 1 1 Male 77700
AsstProf Applied 2 0 Male 78000
Prof Applied 20 18 Male 104800
Prof Applied 12 3 Male 117150
Prof Applied 19 20 Male 101000
Prof Theorectical 38 34 Male 103450
Prof Theorectical 37 23 Male 124750
Prof Theorectical 39 36 Female 137000
Prof Theorectical 31 26 Male 89565
Prof Theorectical 36 31 Male 102580
Prof Theorectical 34 30 Male 93904
Prof Theorectical 24 19 Male 113068
AssocProf Theorectical 13 8 Female 74830
Prof Theorectical 21 8 Male 106294
Prof Theorectical 35 23 Male 134885
AsstProf Applied 5 3 Male 82379
AsstProf Applied 11 0 Male 77000
Prof Applied 12 8 Male 118223
Prof Applied 20 4 Male 132261
AsstProf Applied 7 2 Male 79916
Prof Applied 13 9 Male 117256
AsstProf Applied 4 2 Male 80225
AsstProf Applied 4 2 Female 80225
AsstProf Applied 5 0 Female 77000
Prof Applied 22 21 Male 155750
AsstProf Applied 7 4 Male 86373
Prof Applied 41 31 Male 125196
AssocProf Applied 9 9 Male 100938
Prof Applied 23 2 Male 146500
AssocProf Applied 23 23 Male 93418
Prof Applied 40 27 Male 101299
Prof Applied 38 38 Male 231545
Prof Applied 19 19 Male 94384
Prof Applied 25 15 Male 114778
Prof Applied 40 28 Male 98193
Prof Applied 23 19 Female 151768
Prof Applied 25 25 Female 140096
AsstProf Applied 1 1 Male 70768
Prof Applied 28 28 Male 126621
Prof Applied 12 11 Male 108875
AsstProf Applied 11 3 Female 74692
Prof Applied 16 9 Male 106639
AssocProf Applied 12 11 Male 103760
AssocProf Applied 14 5 Male 83900
Prof Applied 23 21 Male 117704
AssocProf Applied 9 8 Male 90215
AssocProf Applied 10 9 Male 100135
AsstProf Applied 8 3 Male 75044
AssocProf Applied 9 8 Male 90304
AsstProf Applied 3 2 Male 75243
Prof Applied 33 31 Male 109785
AssocProf Applied 11 11 Female 103613
AsstProf Applied 4 3 Male 68404
AssocProf Applied 9 8 Male 100522
Prof Applied 22 12 Male 101000
Prof Applied 35 31 Male 99418
Prof Applied 17 17 Female 111512
Prof Applied 28 36 Male 91412
Prof Applied 17 2 Male 126320
Prof Applied 45 45 Male 146856
Prof Applied 29 19 Male 100131
Prof Applied 35 34 Male 92391
Prof Applied 28 23 Male 113398
AsstProf Applied 8 3 Male 73266
Prof Applied 17 3 Male 150480
Prof Applied 26 19 Male 193000
AsstProf Applied 3 1 Male 86100
AsstProf Applied 6 2 Male 84240
Prof Applied 43 28 Male 150743
Prof Applied 17 16 Male 135585
Prof Applied 22 20 Male 144640
AsstProf Applied 6 2 Male 88825
Prof Applied 17 18 Female 122960
Prof Applied 15 14 Male 132825
Prof Applied 37 37 Male 152708
AsstProf Applied 2 2 Male 88400
Prof Applied 25 25 Male 172272
AssocProf Applied 9 7 Male 107008
AsstProf Applied 10 5 Female 97032
AssocProf Applied 10 7 Male 105128
AssocProf Applied 10 7 Male 105631
Prof Applied 38 38 Male 166024
Prof Applied 21 20 Male 123683
AsstProf Applied 4 0 Male 84000
AssocProf Applied 17 12 Male 95611
Prof Applied 13 7 Male 129676
Prof Applied 30 14 Male 102235
Prof Applied 41 26 Male 106689
Prof Applied 42 25 Male 133217
Prof Applied 28 23 Male 126933
Prof Applied 16 5 Male 153303
Prof Applied 20 14 Female 127512
AssocProf Theorectical 18 10 Male 83850
Prof Theorectical 31 28 Male 113543
AssocProf Theorectical 11 8 Male 82099
AssocProf Theorectical 10 8 Male 82600
AssocProf Theorectical 15 8 Male 81500
Prof Theorectical 40 31 Male 131205
Prof Theorectical 20 16 Male 112429
AssocProf Theorectical 19 16 Male 82100
AsstProf Theorectical 3 1 Male 72500
Prof Theorectical 37 37 Male 104279
Prof Theorectical 12 0 Female 105000
Prof Theorectical 21 9 Male 120806
Prof Theorectical 30 29 Male 148500
Prof Theorectical 39 36 Male 117515
AsstProf Theorectical 4 1 Male 72500
AsstProf Theorectical 5 3 Female 73500
Prof Theorectical 14 14 Male 115313
Prof Theorectical 32 32 Male 124309
Prof Theorectical 24 22 Male 97262
AssocProf Theorectical 25 22 Female 62884
Prof Theorectical 24 22 Male 96614
Prof Theorectical 54 49 Male 78162
Prof Theorectical 28 26 Male 155500
AsstProf Theorectical 2 0 Female 72500
Prof Theorectical 32 30 Male 113278
AsstProf Theorectical 4 2 Male 73000
AssocProf Theorectical 11 9 Male 83001
Prof Theorectical 56 57 Male 76840
AssocProf Theorectical 10 8 Female 77500
AsstProf Theorectical 3 1 Female 72500
Prof Theorectical 35 25 Male 168635
Prof Theorectical 20 18 Male 136000
Prof Theorectical 16 14 Male 108262
Prof Theorectical 17 14 Male 105668
AssocProf Theorectical 10 7 Male 73877
Prof Theorectical 21 18 Male 152664
AssocProf Theorectical 14 8 Male 100102
AssocProf Theorectical 15 10 Male 81500
Prof Theorectical 19 11 Male 106608
AsstProf Applied 3 3 Male 89942
Prof Applied 27 27 Male 112696
Prof Applied 28 28 Male 119015
AsstProf Applied 4 4 Male 92000
Prof Applied 27 27 Male 156938
Prof Applied 36 26 Female 144651
AsstProf Applied 4 3 Male 95079
Prof Applied 14 12 Male 128148
AsstProf Applied 4 4 Male 92000
Prof Applied 21 9 Male 111168
AssocProf Applied 12 10 Female 103994
AsstProf Applied 4 0 Male 92000
Prof Applied 21 21 Male 118971
AssocProf Applied 12 18 Male 113341
AsstProf Applied 1 0 Male 88000
AssocProf Applied 6 6 Male 95408
Prof Applied 15 16 Male 137167
AsstProf Applied 2 2 Male 89516
Prof Applied 26 19 Male 176500
AssocProf Applied 22 7 Male 98510
AsstProf Applied 3 3 Male 89942
AsstProf Applied 1 0 Male 88795
Prof Applied 21 8 Male 105890
Prof Applied 16 16 Male 167284
Prof Applied 18 19 Male 130664
AssocProf Applied 8 6 Male 101210
Prof Applied 25 18 Male 181257
AsstProf Applied 5 5 Male 91227
Prof Applied 19 19 Male 151575
Prof Applied 37 24 Male 93164
Prof Applied 20 20 Male 134185
AssocProf Applied 17 6 Male 105000
Prof Applied 28 25 Male 111751
AssocProf Applied 10 7 Male 95436
AssocProf Applied 13 9 Male 100944
Prof Applied 27 14 Male 147349
AsstProf Applied 3 3 Female 92000
Prof Applied 11 11 Male 142467
Prof Applied 18 5 Male 141136
AssocProf Applied 8 8 Male 100000
Prof Applied 26 22 Male 150000
Prof Applied 23 23 Male 101000
Prof Applied 33 30 Male 134000
AssocProf Applied 13 10 Female 103750
Prof Applied 18 10 Male 107500
AssocProf Applied 28 28 Male 106300
Prof Applied 25 19 Male 153750
Prof Applied 22 9 Male 180000
Prof Applied 43 22 Male 133700
Prof Applied 19 18 Male 122100
AssocProf Applied 19 19 Male 86250
AssocProf Applied 48 53 Male 90000
AssocProf Applied 9 7 Male 113600
AsstProf Applied 4 4 Male 92700
AsstProf Applied 4 4 Male 92000
Prof Applied 34 33 Male 189409
Prof Applied 38 22 Male 114500
AsstProf Applied 4 4 Male 92700
Prof Applied 40 40 Male 119700
Prof Applied 28 17 Male 160400
Prof Applied 17 17 Male 152500
Prof Applied 19 5 Male 165000
Prof Applied 21 2 Male 96545
Prof Applied 35 33 Male 162200
Prof Applied 18 18 Male 120000
AsstProf Applied 7 2 Male 91300
Prof Applied 20 20 Male 163200
AsstProf Applied 4 3 Male 91000
Prof Applied 39 39 Male 111350
Prof Applied 15 7 Male 128400
Prof Applied 26 19 Male 126200
AssocProf Applied 11 1 Male 118700
Prof Applied 16 11 Male 145350
Prof Applied 15 11 Male 146000
AssocProf Applied 29 22 Male 105350
AssocProf Applied 14 7 Female 109650
Prof Applied 13 11 Male 119500
Prof Applied 21 21 Male 170000
Prof Applied 23 10 Male 145200
AssocProf Applied 13 6 Male 107150
Prof Applied 34 20 Male 129600
Prof Theorectical 38 35 Male 87800
Prof Theorectical 20 20 Male 122400
AsstProf Theorectical 3 1 Male 63900
AssocProf Theorectical 9 7 Male 70000
Prof Theorectical 16 11 Male 88175
Prof Theorectical 39 38 Male 133900
Prof Theorectical 29 27 Female 91000
AssocProf Theorectical 26 24 Female 73300
Prof Theorectical 38 19 Male 148750
Prof Theorectical 36 19 Female 117555
AsstProf Theorectical 8 3 Male 69700
Prof Theorectical 28 17 Male 81700
Prof Theorectical 25 25 Male 114000
AsstProf Theorectical 7 6 Female 63100
Prof Theorectical 46 40 Male 77202
Prof Theorectical 19 6 Male 96200
AsstProf Theorectical 5 3 Male 69200
Prof Theorectical 31 30 Male 122875
Prof Theorectical 38 37 Male 102600
Prof Theorectical 23 23 Male 108200
Prof Theorectical 19 23 Male 84273
Prof Theorectical 17 11 Female 90450
Prof Theorectical 30 23 Male 91100
Prof Theorectical 21 18 Male 101100
Prof Theorectical 28 23 Male 128800
Prof Theorectical 29 7 Male 204000
Prof Theorectical 39 39 Male 109000
Prof Theorectical 20 8 Male 102000
Prof Theorectical 31 12 Male 132000
AsstProf Theorectical 4 2 Female 77500
Prof Theorectical 28 7 Female 116450
AssocProf Theorectical 12 8 Male 83000
Prof Theorectical 22 22 Male 140300
AssocProf Theorectical 30 23 Male 74000
AsstProf Theorectical 9 3 Male 73800
Prof Theorectical 32 30 Male 92550
AssocProf Theorectical 41 33 Male 88600
Prof Theorectical 45 45 Male 107550
Prof Theorectical 31 26 Male 121200
Prof Theorectical 31 31 Male 126000
Prof Theorectical 37 35 Male 99000
Prof Theorectical 36 30 Male 134800
Prof Theorectical 43 43 Male 143940
Prof Theorectical 14 10 Male 104350
Prof Theorectical 47 44 Male 89650
Prof Theorectical 13 7 Male 103700
Prof Theorectical 42 40 Male 143250
Prof Theorectical 42 18 Male 194800
AsstProf Theorectical 4 1 Male 73000
AsstProf Theorectical 8 4 Male 74000
AsstProf Theorectical 8 3 Female 78500
Prof Theorectical 12 6 Male 93000
Prof Theorectical 52 48 Male 107200
Prof Theorectical 31 27 Male 163200
Prof Theorectical 24 18 Male 107100
Prof Theorectical 46 46 Male 100600
Prof Theorectical 39 38 Male 136500
Prof Theorectical 37 27 Male 103600
Prof Theorectical 51 51 Male 57800
Prof Theorectical 45 43 Male 155865
AssocProf Theorectical 8 6 Male 88650
AssocProf Theorectical 49 49 Male 81800
Prof Theorectical 28 27 Male 115800
AsstProf Theorectical 2 0 Male 85000
Prof Theorectical 29 27 Male 150500
AsstProf Theorectical 8 5 Male 74000
Prof Theorectical 33 7 Male 174500
Prof Theorectical 32 28 Male 168500
Prof Theorectical 39 9 Male 183800
AssocProf Theorectical 11 1 Male 104800
Prof Theorectical 19 7 Male 107300
Prof Theorectical 40 36 Male 97150
Prof Theorectical 18 18 Male 126300
Prof Theorectical 17 11 Male 148800
Prof Theorectical 49 43 Male 72300
AssocProf Theorectical 45 39 Male 70700
Prof Theorectical 39 36 Male 88600
Prof Theorectical 27 16 Male 127100
Prof Theorectical 28 13 Male 170500
Prof Theorectical 14 4 Male 105260
Prof Theorectical 46 44 Male 144050
Prof Theorectical 33 31 Male 111350
AsstProf Theorectical 7 4 Male 74500
Prof Theorectical 31 28 Male 122500
AsstProf Theorectical 5 0 Male 74000
Prof Theorectical 22 15 Male 166800
Prof Theorectical 20 7 Male 92050
Prof Theorectical 14 9 Male 108100
Prof Theorectical 29 19 Male 94350
Prof Theorectical 35 35 Male 100351
Prof Theorectical 22 6 Male 146800
AsstProf Applied 6 3 Male 84716
AssocProf Applied 12 9 Female 71065
Prof Applied 46 45 Male 67559
Prof Applied 16 16 Male 134550
Prof Applied 16 15 Male 135027
Prof Applied 24 23 Male 104428
AssocProf Applied 9 9 Male 95642
AssocProf Applied 13 11 Male 126431
Prof Applied 24 15 Female 161101
Prof Applied 30 31 Male 162221
AsstProf Applied 8 4 Male 84500
Prof Applied 23 15 Male 124714
Prof Applied 37 37 Male 151650
AssocProf Applied 10 10 Male 99247
Prof Applied 23 23 Male 134778
Prof Applied 49 60 Male 192253
Prof Applied 20 9 Male 116518
Prof Applied 18 10 Female 105450
Prof Applied 33 19 Male 145098
AssocProf Applied 19 6 Female 104542
Prof Applied 36 38 Male 151445
Prof Applied 35 23 Male 98053
Prof Applied 13 12 Male 145000
Prof Applied 32 25 Male 128464
Prof Applied 37 15 Male 137317
Prof Applied 13 11 Male 106231
Prof Applied 17 17 Female 124312
Prof Applied 38 38 Male 114596
Prof Applied 31 31 Male 162150
Prof Applied 32 35 Male 150376
Prof Applied 15 10 Male 107986
Prof Applied 41 27 Male 142023
Prof Applied 39 33 Male 128250
AsstProf Applied 4 3 Male 80139
Prof Applied 27 28 Male 144309
Prof Applied 56 49 Male 186960
Prof Applied 38 38 Male 93519
Prof Applied 26 27 Male 142500
Prof Applied 22 20 Male 138000
AsstProf Applied 8 1 Male 83600
Prof Applied 25 21 Male 145028
Prof Theorectical 49 40 Male 88709
Prof Theorectical 39 35 Male 107309
Prof Theorectical 28 14 Female 109954
AsstProf Theorectical 11 4 Male 78785
Prof Theorectical 14 11 Male 121946
Prof Theorectical 23 15 Female 109646
Prof Theorectical 30 30 Male 138771
AssocProf Theorectical 20 17 Male 81285
Prof Theorectical 43 43 Male 205500
Prof Theorectical 43 40 Male 101036
Prof Theorectical 15 10 Male 115435
AssocProf Theorectical 10 1 Male 108413
Prof Theorectical 35 30 Male 131950
Prof Theorectical 33 31 Male 134690
AssocProf Theorectical 13 8 Male 78182
Prof Theorectical 23 20 Male 110515
Prof Theorectical 12 7 Male 109707
Prof Theorectical 30 26 Male 136660
Prof Theorectical 27 19 Male 103275
Prof Theorectical 28 26 Male 103649
AsstProf Theorectical 4 1 Male 74856
AsstProf Theorectical 6 3 Male 77081
Prof Theorectical 38 38 Male 150680
AssocProf Theorectical 11 8 Male 104121
AsstProf Theorectical 8 3 Male 75996
Prof Theorectical 27 23 Male 172505
AssocProf Theorectical 8 5 Male 86895
Prof Theorectical 44 44 Male 105000
Prof Theorectical 27 21 Male 125192
Prof Theorectical 15 9 Male 114330
Prof Theorectical 29 27 Male 139219
Prof Theorectical 29 15 Male 109305
Prof Theorectical 38 36 Male 119450
Prof Theorectical 33 18 Male 186023
Prof Theorectical 40 19 Male 166605
Prof Theorectical 30 19 Male 151292
Prof Theorectical 33 30 Male 103106
Prof Theorectical 31 19 Male 150564
Prof Theorectical 42 25 Male 101738
Prof Theorectical 25 15 Male 95329
AsstProf Theorectical 8 4 Male 81035
  1. Stepwise main effects model Fit the full model g.main: salary ~ .
g.main<-lm(salary ~ ., data=Salaries)

Obtain the coefficient estimates,standard error of estimates, t value,p value

#Coefficient estimates
coef(g.main)
##       (Intercept)     rankAssocProf          rankProf disciplineApplied 
##        65955.2324        12907.5879        45065.9987        14417.6256 
##     yrs.since.phd       yrs.service           sexMale 
##          535.0583         -489.5157         4783.4928
#standard error of estimates
estSigma <- summary(g.main)$sigma
estSigma
## [1] 22538.65
#or 
sqrt(deviance(g.main)/df.residual(g.main))
## [1] 22538.65
# or
summary(g.main)$sigma
## [1] 22538.65
# or
k=length(g.main$coefficients)-1
SSE=sum(g.main$residuals**2)
n=length(g.main$residuals)
sqrt(SSE/(n-(1+k)))
## [1] 22538.65
#t value
summary(g.main)
## 
## Call:
## lm(formula = salary ~ ., data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -65248 -13211  -1775  10384  99592 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        65955.2     4588.6  14.374  < 2e-16 ***
## rankAssocProf      12907.6     4145.3   3.114  0.00198 ** 
## rankProf           45066.0     4237.5  10.635  < 2e-16 ***
## disciplineApplied  14417.6     2342.9   6.154 1.88e-09 ***
## yrs.since.phd        535.1      241.0   2.220  0.02698 *  
## yrs.service         -489.5      211.9  -2.310  0.02143 *  
## sexMale             4783.5     3858.7   1.240  0.21584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22540 on 390 degrees of freedom
## Multiple R-squared:  0.4547, Adjusted R-squared:  0.4463 
## F-statistic:  54.2 on 6 and 390 DF,  p-value: < 2.2e-16
lwr <- qt(.025, g.main$df.residual)
upr <- qt(.975, g.main$df.residual)
c(lwr, upr)
## [1] -1.966065  1.966065

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.main)
## 
## Call:
## lm(formula = salary ~ ., data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -65248 -13211  -1775  10384  99592 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        65955.2     4588.6  14.374  < 2e-16 ***
## rankAssocProf      12907.6     4145.3   3.114  0.00198 ** 
## rankProf           45066.0     4237.5  10.635  < 2e-16 ***
## disciplineApplied  14417.6     2342.9   6.154 1.88e-09 ***
## yrs.since.phd        535.1      241.0   2.220  0.02698 *  
## yrs.service         -489.5      211.9  -2.310  0.02143 *  
## sexMale             4783.5     3858.7   1.240  0.21584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22540 on 390 degrees of freedom
## Multiple R-squared:  0.4547, Adjusted R-squared:  0.4463 
## F-statistic:  54.2 on 6 and 390 DF,  p-value: < 2.2e-16

Run AIC stepwise regression on g.main, save the results into g.step

g.step <- update(g.main, . ~ . - sex)
g.step<- step(g.main)
## Start:  AIC=7965.19
## salary ~ rank + discipline + yrs.since.phd + yrs.service + sex
## 
##                 Df  Sum of Sq        RSS    AIC
## - sex            1 7.8068e+08 1.9890e+11 7964.8
## <none>                        1.9812e+11 7965.2
## - yrs.since.phd  1 2.5041e+09 2.0062e+11 7968.2
## - yrs.service    1 2.7100e+09 2.0083e+11 7968.6
## - discipline     1 1.9237e+10 2.1735e+11 8000.0
## - rank           2 6.9508e+10 2.6762e+11 8080.6
## 
## Step:  AIC=7964.75
## salary ~ rank + discipline + yrs.since.phd + yrs.service
## 
##                 Df  Sum of Sq        RSS    AIC
## <none>                        1.9890e+11 7964.8
## - yrs.since.phd  1 2.5001e+09 2.0140e+11 7967.7
## - yrs.service    1 2.5763e+09 2.0147e+11 7967.9
## - discipline     1 1.9489e+10 2.1839e+11 7999.9
## - rank           2 7.0679e+10 2.6958e+11 8081.5

Run a comparison of the coefficients of the two models, g.main and g.step.

comp<-compareCoefs(g.main,g.step, se = FALSE)
## Calls:
## 1: lm(formula = salary ~ ., data = Salaries)
## 2: lm(formula = salary ~ rank + discipline + yrs.since.phd + yrs.service, 
##   data = Salaries)
## 
##                   Model 1 Model 2
## (Intercept)         65955   69869
## rankAssocProf       12908   12832
## rankProf            45066   45288
## disciplineApplied   14418   14505
## yrs.since.phd         535     535
## yrs.service          -490    -477
## sexMale              4784
colnames(comp) <- c("g.main", "g.step")
comp
##                       g.main     g.step
## (Intercept)       65955.2324 69869.0110
## rankAssocProf     12907.5879 12831.5375
## rankProf          45065.9987 45287.6890
## disciplineApplied 14417.6256 14505.1514
## yrs.since.phd       535.0583   534.6313
## yrs.service        -489.5157  -476.7179
## sexMale            4783.4928         NA

Which predictor variable(s) are excluded from the final model?

#The predictor variable "sex" is excluded from the final model because it was greater than the p value.
  1. Scatterplot Matrix Obtain the scatterplot matrix with these specifications: the variables in this order: c(“sex”, “rank”, “discipline”, “yrs.since.phd”, “yrs.service”, “salary”) mapping = aes(color = discipline) lower = list(continuous = wrap(“smooth”, method = “loess”, se = FALSE, alpha = 0.3, size=0.1)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(ggplot2)
ggpairs(Salaries, columns=c("sex",  "rank",  "discipline",  "yrs.since.phd",  "yrs.service",  "salary"),
        mapping = aes(colour = discipline), 
       lower = list(continuous = wrap("smooth", 
                                      method = "loess", se = FALSE, 
                                      alpha = 0.3, size=0.1)
                    )
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

4. Quadratic Model There appears to be an approximate quadratic or cubic relation of salary vs years.since.phd and salary vs years.service. Fit the quadratic model g.quad: salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2)

g.quad <-lm(salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2), data = Salaries)

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.quad)
## 
## Call:
## lm(formula = salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2), 
##     data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -60433 -15784  -1580  12032  94567 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        54056.237   4170.882  12.960  < 2e-16 ***
## disciplineApplied  15413.552   2487.443   6.197 1.46e-09 ***
## yrs.since.phd       4171.914    348.992  11.954  < 2e-16 ***
## I(yrs.since.phd^2)   -63.037      6.926  -9.102  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24090 on 393 degrees of freedom
## Multiple R-squared:  0.3724, Adjusted R-squared:  0.3676 
## F-statistic: 77.73 on 3 and 393 DF,  p-value: < 2.2e-16

Run AIC stepwise on g.quad and save the results in g.quad.step

g.quad.step<-update(g.quad, .~. -discipline)
g.quad.step<-step(g.quad)
## Start:  AIC=8014.99
## salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2)
## 
##                      Df  Sum of Sq        RSS    AIC
## <none>                             2.2801e+11 8015.0
## - discipline          1 2.2278e+10 2.5029e+11 8050.0
## - I(yrs.since.phd^2)  1 4.8067e+10 2.7608e+11 8088.9
## - yrs.since.phd       1 8.2910e+10 3.1092e+11 8136.1

Compare the coefficients of g.quad and g.quad.step.

compar<-compareCoefs(g.quad,g.quad.step, se = FALSE)
## Calls:
## 1: lm(formula = salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2), 
##   data = Salaries)
## 2: lm(formula = salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2), 
##   data = Salaries)
## 
##                    Model 1 Model 2
## (Intercept)          54056   54056
## disciplineApplied    15414   15414
## yrs.since.phd         4172    4172
## I(yrs.since.phd^2)     -63     -63
colnames(compar) <- c("g.quad", "g.quad.step")
compar
##                         g.quad g.quad.step
## (Intercept)        54056.23664 54056.23664
## disciplineApplied  15413.55206 15413.55206
## yrs.since.phd       4171.91362  4171.91362
## I(yrs.since.phd^2)   -63.03656   -63.03656

What are the differences between these two models”

# Both models "g.quad" and "g.quad.step"are identical, as the p value is not bigger than 2.2e-16.
  1. Quadratic model using poly() Look up the command poly()
?poly()

Fit a 2nd degree polynomial: g.poly2: salary ~ sex + rank + discipline + poly(yrs.since.phd, 2) + poly(yrs.service, 2)

g.poly2<- lm(salary ~ sex + rank + discipline + poly(yrs.since.phd, 2) + poly(yrs.service, 2), data=Salaries)

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.poly2)
## 
## Call:
## lm(formula = salary ~ sex + rank + discipline + poly(yrs.since.phd, 
##     2) + poly(yrs.service, 2), data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -62474 -13343  -1545  10337  97997 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                76437       5855  13.055  < 2e-16 ***
## sexMale                     5488       3854   1.424  0.15525    
## rankAssocProf               6334       5042   1.256  0.20983    
## rankProf                   34854       6184   5.636 3.35e-08 ***
## disciplineApplied          14605       2335   6.256 1.05e-09 ***
## poly(yrs.since.phd, 2)1   170016      63114   2.694  0.00737 ** 
## poly(yrs.since.phd, 2)2   -95135      45858  -2.075  0.03869 *  
## poly(yrs.service, 2)1    -103756      55560  -1.867  0.06259 .  
## poly(yrs.service, 2)2      24061      38415   0.626  0.53146    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22440 on 388 degrees of freedom
## Multiple R-squared:  0.4622, Adjusted R-squared:  0.4512 
## F-statistic: 41.69 on 8 and 388 DF,  p-value: < 2.2e-16

Run AIC stepwise on g.poly2 and save the results in g.poly2.step

g.Poly2.step<-step(g.poly2)
## Start:  AIC=7963.64
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 2) + poly(yrs.service, 
##     2)
## 
##                          Df  Sum of Sq        RSS    AIC
## - poly(yrs.service, 2)    2 1.9511e+09 1.9732e+11 7963.6
## <none>                                 1.9537e+11 7963.6
## - sex                     1 1.0210e+09 1.9639e+11 7963.7
## - poly(yrs.since.phd, 2)  2 4.9169e+09 2.0028e+11 7969.5
## - discipline              1 1.9705e+10 2.1507e+11 7999.8
## - rank                    2 2.8604e+10 2.2397e+11 8013.9
## 
## Step:  AIC=7963.59
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 2)
## 
##                          Df  Sum of Sq        RSS    AIC
## - sex                     1 9.2964e+08 1.9825e+11 7963.5
## <none>                                 1.9732e+11 7963.6
## - poly(yrs.since.phd, 2)  2 3.6266e+09 2.0094e+11 7966.8
## - discipline              1 1.8774e+10 2.1609e+11 7997.7
## - rank                    2 2.8413e+10 2.2573e+11 8013.0
## 
## Step:  AIC=7963.45
## salary ~ rank + discipline + poly(yrs.since.phd, 2)
## 
##                          Df  Sum of Sq        RSS    AIC
## <none>                                 1.9825e+11 7963.5
## - poly(yrs.since.phd, 2)  2 3.3910e+09 2.0164e+11 7966.2
## - discipline              1 1.9050e+10 2.1730e+11 7997.9
## - rank                    2 2.9766e+10 2.2801e+11 8015.0

Compare coefficients of all models: g.poly2 and g.poly2.step

compar<-compareCoefs(g.poly2,g.Poly2.step, se = FALSE)
## Calls:
## 1: lm(formula = salary ~ sex + rank + discipline + poly(yrs.since.phd, 2) + 
##   poly(yrs.service, 2), data = Salaries)
## 2: lm(formula = salary ~ rank + discipline + poly(yrs.since.phd, 2), data = 
##   Salaries)
## 
##                         Model 1 Model 2
## (Intercept)               76437   81643
## sexMale                    5488        
## rankAssocProf              6334    5801
## rankProf                  34854   34849
## disciplineApplied         14605   14297
## poly(yrs.since.phd, 2)1  170016   77194
## poly(yrs.since.phd, 2)2  -95135  -83192
## poly(yrs.service, 2)1   -103756        
## poly(yrs.service, 2)2     24061
colnames(compar) <- c("g.poly2", "g.poly2.step")
compar
##                             g.poly2 g.poly2.step
## (Intercept)               76437.136    81642.915
## sexMale                    5487.975           NA
## rankAssocProf              6333.541     5800.885
## rankProf                  34854.482    34848.875
## disciplineApplied         14604.570    14297.080
## poly(yrs.since.phd, 2)1  170016.018    77193.528
## poly(yrs.since.phd, 2)2  -95135.382   -83192.377
## poly(yrs.service, 2)1   -103755.540           NA
## poly(yrs.service, 2)2     24060.905           NA
  1. Interaction-poly2 Model Fit the interaction model g.poly2.inter: salary ~ (sex + rank + discipline) * (poly(yrs.since.phd, 2) + poly(yrs.service, 2))

Note: A long way to write the model g.poly2.inter: salary ~ sex * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 2)) + rank * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 2)) + discipline*(poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 2))

g.poly2.inter<-lm (salary ~ sex * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 2)) +  rank * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 2)) + discipline*(poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 2)), data=Salaries)

Run AIC stepwise on g.poly2.inter and save the results in g.poly2.inter.step.

g.Poly2.inter.step<-step(g.poly2.inter)
## Start:  AIC=7957.5
## salary ~ sex * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 
##     2)) + rank * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 
##     2)) + discipline * (poly(yrs.since.phd, 2) + discipline:poly(yrs.service, 
##     2))
## 
##                                        Df  Sum of Sq        RSS    AIC
## - discipline:poly(yrs.service, 2):rank  8 4240145705 1.7643e+11 7951.2
## - poly(yrs.since.phd, 2):rank           4  900189533 1.7309e+11 7951.6
## - sex:discipline:poly(yrs.service, 2)   4 1875104964 1.7406e+11 7953.8
## - sex:poly(yrs.since.phd, 2)            2  194214508 1.7238e+11 7953.9
## <none>                                               1.7219e+11 7957.5
## - poly(yrs.since.phd, 2):discipline     2 8652330012 1.8084e+11 7973.0
## 
## Step:  AIC=7951.15
## salary ~ sex + poly(yrs.since.phd, 2) + rank + discipline + discipline:poly(yrs.service, 
##     2) + sex:poly(yrs.since.phd, 2) + poly(yrs.since.phd, 2):rank + 
##     poly(yrs.since.phd, 2):discipline + sex:discipline:poly(yrs.service, 
##     2)
## 
##                                       Df  Sum of Sq        RSS    AIC
## - sex:poly(yrs.since.phd, 2)           2  448637666 1.7687e+11 7948.2
## - sex:discipline:poly(yrs.service, 2)  4 2680727703 1.7911e+11 7949.1
## - poly(yrs.since.phd, 2):rank          4 3373473590 1.7980e+11 7950.7
## <none>                                              1.7643e+11 7951.2
## - poly(yrs.since.phd, 2):discipline    2 6884421396 1.8331e+11 7962.4
## 
## Step:  AIC=7948.16
## salary ~ sex + poly(yrs.since.phd, 2) + rank + discipline + discipline:poly(yrs.service, 
##     2) + poly(yrs.since.phd, 2):rank + poly(yrs.since.phd, 2):discipline + 
##     sex:discipline:poly(yrs.service, 2)
## 
##                                       Df  Sum of Sq        RSS    AIC
## - sex:discipline:poly(yrs.service, 2)  4 2879778235 1.7975e+11 7946.6
## <none>                                              1.7687e+11 7948.2
## - poly(yrs.since.phd, 2):rank          4 3860094978 1.8073e+11 7948.7
## - poly(yrs.since.phd, 2):discipline    2 6841108834 1.8372e+11 7959.2
## 
## Step:  AIC=7946.57
## salary ~ sex + poly(yrs.since.phd, 2) + rank + discipline + discipline:poly(yrs.service, 
##     2) + poly(yrs.since.phd, 2):rank + poly(yrs.since.phd, 2):discipline
## 
##                                     Df  Sum of Sq        RSS    AIC
## - sex                                1 7.6305e+08 1.8052e+11 7946.3
## <none>                                            1.7975e+11 7946.6
## - poly(yrs.since.phd, 2):rank        4 4.3436e+09 1.8410e+11 7948.1
## - poly(yrs.since.phd, 2):discipline  2 6.5479e+09 1.8630e+11 7956.8
## - discipline:poly(yrs.service, 2)    4 1.2076e+10 1.9183e+11 7964.4
## 
## Step:  AIC=7946.26
## salary ~ poly(yrs.since.phd, 2) + rank + discipline + discipline:poly(yrs.service, 
##     2) + poly(yrs.since.phd, 2):rank + poly(yrs.since.phd, 2):discipline
## 
##                                     Df  Sum of Sq        RSS    AIC
## <none>                                            1.8052e+11 7946.3
## - poly(yrs.since.phd, 2):rank        4 4.5822e+09 1.8510e+11 7948.2
## - poly(yrs.since.phd, 2):discipline  2 6.3978e+09 1.8691e+11 7956.1
## - discipline:poly(yrs.service, 2)    4 1.1841e+10 1.9236e+11 7963.5

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.Poly2.inter.step)
## 
## Call:
## lm(formula = salary ~ poly(yrs.since.phd, 2) + rank + discipline + 
##     discipline:poly(yrs.service, 2) + poly(yrs.since.phd, 2):rank + 
##     poly(yrs.since.phd, 2):discipline, data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -70939 -12112   -571  10464  95072 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                     -2859     160204  -0.018
## poly(yrs.since.phd, 2)1                      -1265575    3489792  -0.363
## poly(yrs.since.phd, 2)2                       -556476    1283169  -0.434
## rankAssocProf                                   85573     160168   0.534
## rankProf                                       115466     160145   0.721
## disciplineApplied                               14978       2274   6.587
## disciplineTheorectical:poly(yrs.service, 2)1  -341839      78788  -4.339
## disciplineApplied:poly(yrs.service, 2)1        107675      74442   1.446
## disciplineTheorectical:poly(yrs.service, 2)2    -4501      70593  -0.064
## disciplineApplied:poly(yrs.service, 2)2         92382      46375   1.992
## poly(yrs.since.phd, 2)1:rankAssocProf         1499868    3492985   0.429
## poly(yrs.since.phd, 2)2:rankAssocProf          552919    1279580   0.432
## poly(yrs.since.phd, 2)1:rankProf              1754635    3492834   0.502
## poly(yrs.since.phd, 2)2:rankProf               408006    1277997   0.319
## poly(yrs.since.phd, 2)1:disciplineApplied     -399311     110104  -3.627
## poly(yrs.since.phd, 2)2:disciplineApplied      -50234      83689  -0.600
##                                              Pr(>|t|)    
## (Intercept)                                  0.985773    
## poly(yrs.since.phd, 2)1                      0.717067    
## poly(yrs.since.phd, 2)2                      0.664771    
## rankAssocProf                                0.593464    
## rankProf                                     0.471347    
## disciplineApplied                            1.49e-10 ***
## disciplineTheorectical:poly(yrs.service, 2)1 1.84e-05 ***
## disciplineApplied:poly(yrs.service, 2)1      0.148881    
## disciplineTheorectical:poly(yrs.service, 2)2 0.949192    
## disciplineApplied:poly(yrs.service, 2)2      0.047077 *  
## poly(yrs.since.phd, 2)1:rankAssocProf        0.667879    
## poly(yrs.since.phd, 2)2:rankAssocProf        0.665906    
## poly(yrs.since.phd, 2)1:rankProf             0.615710    
## poly(yrs.since.phd, 2)2:rankProf             0.749708    
## poly(yrs.since.phd, 2)1:disciplineApplied    0.000326 ***
## poly(yrs.since.phd, 2)2:disciplineApplied    0.548704    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21770 on 381 degrees of freedom
## Multiple R-squared:  0.5031, Adjusted R-squared:  0.4836 
## F-statistic: 25.72 on 15 and 381 DF,  p-value: < 2.2e-16

Compare coefficients of all models: g.poly2.inter and g.poly2.inter.step

compar<-compareCoefs(g.poly2.inter,g.Poly2.inter.step, se = FALSE)
## Calls:
## 1: lm(formula = salary ~ sex * (poly(yrs.since.phd, 2) + 
##   discipline:poly(yrs.service, 2)) + rank * (poly(yrs.since.phd, 2) + 
##   discipline:poly(yrs.service, 2)) + discipline * (poly(yrs.since.phd, 2) + 
##   discipline:poly(yrs.service, 2)), data = Salaries)
## 2: lm(formula = salary ~ poly(yrs.since.phd, 2) + rank + discipline + 
##   discipline:poly(yrs.service, 2) + poly(yrs.since.phd, 2):rank + 
##   poly(yrs.since.phd, 2):discipline, data = Salaries)
## 
##                                                             Model 1  Model 2
## (Intercept)                                                  337384    -2859
## sexMale                                                        2748         
## poly(yrs.since.phd, 2)1                                     -350158 -1265575
## poly(yrs.since.phd, 2)2                                     -266472  -556476
## rankAssocProf                                               -257726    85573
## rankProf                                                    -230242   115466
## disciplineApplied                                             20826    14978
## disciplineTheorectical:poly(yrs.service, 2)1                7444115  -341839
## disciplineApplied:poly(yrs.service, 2)1                     7796716   107675
## disciplineTheorectical:poly(yrs.service, 2)2                3219508    -4501
## disciplineApplied:poly(yrs.service, 2)2                     2571296    92382
## sexMale:poly(yrs.since.phd, 2)1                               24112         
## sexMale:poly(yrs.since.phd, 2)2                              -92208         
## poly(yrs.since.phd, 2)1:rankAssocProf                        913660  1499868
## poly(yrs.since.phd, 2)2:rankAssocProf                        254251   552919
## poly(yrs.since.phd, 2)1:rankProf                             901565  1754635
## poly(yrs.since.phd, 2)2:rankProf                             137266   408006
## poly(yrs.since.phd, 2)1:disciplineApplied                   -546282  -399311
## poly(yrs.since.phd, 2)2:disciplineApplied                     94353   -50234
## sexMale:disciplineTheorectical:poly(yrs.service, 2)1        -302661         
## sexMale:disciplineApplied:poly(yrs.service, 2)1              -10497         
## sexMale:disciplineTheorectical:poly(yrs.service, 2)2        -275831         
## sexMale:disciplineApplied:poly(yrs.service, 2)2              141608         
## disciplineTheorectical:poly(yrs.service, 2)1:rankAssocProf -7792962         
## disciplineApplied:poly(yrs.service, 2)1:rankAssocProf      -7836275         
## disciplineTheorectical:poly(yrs.service, 2)2:rankAssocProf -2782313         
## disciplineApplied:poly(yrs.service, 2)2:rankAssocProf      -2728586         
## disciplineTheorectical:poly(yrs.service, 2)1:rankProf      -7488892         
## disciplineApplied:poly(yrs.service, 2)1:rankProf           -7694882         
## disciplineTheorectical:poly(yrs.service, 2)2:rankProf      -2951332         
## disciplineApplied:poly(yrs.service, 2)2:rankProf           -2600910
colnames(compar) <- c("g.poly2.inter", "g.poly2.inter.step")
compar
##                                                            g.poly2.inter
## (Intercept)                                                   337384.391
## sexMale                                                         2748.007
## poly(yrs.since.phd, 2)1                                      -350158.144
## poly(yrs.since.phd, 2)2                                      -266471.717
## rankAssocProf                                                -257726.124
## rankProf                                                     -230242.291
## disciplineApplied                                              20826.377
## disciplineTheorectical:poly(yrs.service, 2)1                 7444115.259
## disciplineApplied:poly(yrs.service, 2)1                      7796716.456
## disciplineTheorectical:poly(yrs.service, 2)2                 3219508.356
## disciplineApplied:poly(yrs.service, 2)2                      2571296.507
## sexMale:poly(yrs.since.phd, 2)1                                24112.542
## sexMale:poly(yrs.since.phd, 2)2                               -92208.289
## poly(yrs.since.phd, 2)1:rankAssocProf                         913659.554
## poly(yrs.since.phd, 2)2:rankAssocProf                         254250.730
## poly(yrs.since.phd, 2)1:rankProf                              901565.175
## poly(yrs.since.phd, 2)2:rankProf                              137266.227
## poly(yrs.since.phd, 2)1:disciplineApplied                    -546281.714
## poly(yrs.since.phd, 2)2:disciplineApplied                      94352.612
## sexMale:disciplineTheorectical:poly(yrs.service, 2)1         -302661.318
## sexMale:disciplineApplied:poly(yrs.service, 2)1               -10497.316
## sexMale:disciplineTheorectical:poly(yrs.service, 2)2         -275830.595
## sexMale:disciplineApplied:poly(yrs.service, 2)2               141608.508
## disciplineTheorectical:poly(yrs.service, 2)1:rankAssocProf  -7792962.406
## disciplineApplied:poly(yrs.service, 2)1:rankAssocProf       -7836274.661
## disciplineTheorectical:poly(yrs.service, 2)2:rankAssocProf  -2782313.152
## disciplineApplied:poly(yrs.service, 2)2:rankAssocProf       -2728586.258
## disciplineTheorectical:poly(yrs.service, 2)1:rankProf       -7488892.523
## disciplineApplied:poly(yrs.service, 2)1:rankProf            -7694881.896
## disciplineTheorectical:poly(yrs.service, 2)2:rankProf       -2951331.741
## disciplineApplied:poly(yrs.service, 2)2:rankProf            -2600909.470
##                                                            g.poly2.inter.step
## (Intercept)                                                         -2858.586
## sexMale                                                                    NA
## poly(yrs.since.phd, 2)1                                          -1265574.904
## poly(yrs.since.phd, 2)2                                           -556475.682
## rankAssocProf                                                       85573.330
## rankProf                                                           115465.616
## disciplineApplied                                                   14977.945
## disciplineTheorectical:poly(yrs.service, 2)1                      -341838.695
## disciplineApplied:poly(yrs.service, 2)1                            107674.985
## disciplineTheorectical:poly(yrs.service, 2)2                        -4501.257
## disciplineApplied:poly(yrs.service, 2)2                             92381.735
## sexMale:poly(yrs.since.phd, 2)1                                            NA
## sexMale:poly(yrs.since.phd, 2)2                                            NA
## poly(yrs.since.phd, 2)1:rankAssocProf                             1499867.742
## poly(yrs.since.phd, 2)2:rankAssocProf                              552918.834
## poly(yrs.since.phd, 2)1:rankProf                                  1754635.272
## poly(yrs.since.phd, 2)2:rankProf                                   408006.311
## poly(yrs.since.phd, 2)1:disciplineApplied                         -399311.445
## poly(yrs.since.phd, 2)2:disciplineApplied                          -50233.580
## sexMale:disciplineTheorectical:poly(yrs.service, 2)1                       NA
## sexMale:disciplineApplied:poly(yrs.service, 2)1                            NA
## sexMale:disciplineTheorectical:poly(yrs.service, 2)2                       NA
## sexMale:disciplineApplied:poly(yrs.service, 2)2                            NA
## disciplineTheorectical:poly(yrs.service, 2)1:rankAssocProf                 NA
## disciplineApplied:poly(yrs.service, 2)1:rankAssocProf                      NA
## disciplineTheorectical:poly(yrs.service, 2)2:rankAssocProf                 NA
## disciplineApplied:poly(yrs.service, 2)2:rankAssocProf                      NA
## disciplineTheorectical:poly(yrs.service, 2)1:rankProf                      NA
## disciplineApplied:poly(yrs.service, 2)1:rankProf                           NA
## disciplineTheorectical:poly(yrs.service, 2)2:rankProf                      NA
## disciplineApplied:poly(yrs.service, 2)2:rankProf                           NA
  1. Cubic model using poly() Fit a 3rd degree polynomial: g.poly3: salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service,3)
g.poly3<-lm(salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service,3), data=Salaries)

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.poly3)
## 
## Call:
## lm(formula = salary ~ sex + rank + discipline + poly(yrs.since.phd, 
##     3) + poly(yrs.service, 3), data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -61499 -12453  -1333  10222  97399 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                72520       6489  11.175  < 2e-16 ***
## sexMale                     5122       3818   1.341  0.18059    
## rankAssocProf              11227       6113   1.836  0.06706 .  
## rankProf                   40324       7256   5.558 5.11e-08 ***
## disciplineApplied          14225       2310   6.157 1.86e-09 ***
## poly(yrs.since.phd, 3)1   134960      64139   2.104  0.03601 *  
## poly(yrs.since.phd, 3)2   -84857      45457  -1.867  0.06269 .  
## poly(yrs.since.phd, 3)3  -105181      32479  -3.238  0.00131 ** 
## poly(yrs.service, 3)1     -93992      55078  -1.707  0.08872 .  
## poly(yrs.service, 3)2      49871      40308   1.237  0.21675    
## poly(yrs.service, 3)3      78499      29675   2.645  0.00850 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22160 on 386 degrees of freedom
## Multiple R-squared:  0.4784, Adjusted R-squared:  0.4649 
## F-statistic:  35.4 on 10 and 386 DF,  p-value: < 2.2e-16

Run AIC stepwise on g.poly3 and save the results in g.poly2.step

g.poly3.step<-step(g.poly3)
## Start:  AIC=7955.55
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service, 
##     3)
## 
##                          Df  Sum of Sq        RSS    AIC
## - sex                     1 8.8334e+08 1.9039e+11 7955.4
## <none>                                 1.8951e+11 7955.6
## - poly(yrs.service, 3)    3 6.1473e+09 1.9566e+11 7962.2
## - poly(yrs.since.phd, 3)  3 1.0402e+10 1.9991e+11 7970.8
## - discipline              1 1.8610e+10 2.0812e+11 7990.7
## - rank                    2 2.9557e+10 2.1906e+11 8009.1
## 
## Step:  AIC=7955.4
## salary ~ rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service, 
##     3)
## 
##                          Df  Sum of Sq        RSS    AIC
## <none>                                 1.9039e+11 7955.4
## - poly(yrs.service, 3)    3 6.0107e+09 1.9640e+11 7961.7
## - poly(yrs.since.phd, 3)  3 1.0409e+10 2.0080e+11 7970.5
## - discipline              1 1.8870e+10 2.0926e+11 7990.9
## - rank                    2 3.1177e+10 2.2157e+11 8011.6

Compare coefficients of all models: g.poly3 and g.poly3.step

compar<-compareCoefs(g.poly3,g.poly3.step, se = FALSE)
## Calls:
## 1: lm(formula = salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + 
##   poly(yrs.service, 3), data = Salaries)
## 2: lm(formula = salary ~ rank + discipline + poly(yrs.since.phd, 3) + 
##   poly(yrs.service, 3), data = Salaries)
## 
##                         Model 1 Model 2
## (Intercept)               72520   76227
## sexMale                    5122        
## rankAssocProf             11227   11840
## rankProf                  40324   41462
## disciplineApplied         14225   14318
## poly(yrs.since.phd, 3)1  134960  131838
## poly(yrs.since.phd, 3)2  -84857  -80680
## poly(yrs.since.phd, 3)3 -105181 -107486
## poly(yrs.service, 3)1    -93992  -91868
## poly(yrs.service, 3)2     49871   50992
## poly(yrs.service, 3)3     78499   77369
colnames(compar) <- c("g.poly3", "g.poly3.step")
compar
##                             g.poly3 g.poly3.step
## (Intercept)               72520.216     76226.77
## sexMale                    5121.782           NA
## rankAssocProf             11226.867     11840.12
## rankProf                  40324.075     41462.55
## disciplineApplied         14225.078     14317.73
## poly(yrs.since.phd, 3)1  134959.712    131838.27
## poly(yrs.since.phd, 3)2  -84857.343    -80680.09
## poly(yrs.since.phd, 3)3 -105180.793   -107485.90
## poly(yrs.service, 3)1    -93991.634    -91867.57
## poly(yrs.service, 3)2     49870.710     50991.53
## poly(yrs.service, 3)3     78499.233     77368.75

What are the differences between these two models”

#g.poly3.step does not have the sex column.
  1. Interaction-poly3 Model Fit the interaction model g.poly3.inter: salary ~ (sex + rank + discipline) * (poly(yrs.since.phd, 3) + poly(yrs.service, 3))

Note: A long way to write the model g.poly3.inter: salary ~ sex * (poly(yrs.since.phd, 3) + discipline:poly(yrs.service, 3)) + rank * (poly(yrs.since.phd, 3) + discipline:poly(yrs.service, 3)) + discipline*(poly(yrs.since.phd, 3) + discipline:poly(yrs.service, 3))

g.poly3.inter<-lm(salary ~(sex + rank + discipline) *  (poly(yrs.since.phd, 3) + poly(yrs.service, 3)), data=Salaries)

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.poly3.inter)
## 
## Call:
## lm(formula = salary ~ (sex + rank + discipline) * (poly(yrs.since.phd, 
##     3) + poly(yrs.service, 3)), data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -73222 -11955   -558   8847  97011 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                               -6.683e+06  8.401e+06  -0.795
## sexMale                                    6.538e+02  8.055e+03   0.081
## rankAssocProf                              6.766e+06  8.402e+06   0.805
## rankProf                                   6.794e+06  8.402e+06   0.809
## disciplineApplied                          1.473e+04  2.304e+03   6.392
## poly(yrs.since.phd, 3)1                   -6.720e+06  4.074e+07  -0.165
## poly(yrs.since.phd, 3)2                   -4.145e+06  2.432e+07  -0.170
## poly(yrs.since.phd, 3)3                   -1.214e+06  6.009e+06  -0.202
## poly(yrs.service, 3)1                     -2.063e+08  2.549e+08  -0.809
## poly(yrs.service, 3)2                     -1.343e+08  1.646e+08  -0.816
## poly(yrs.service, 3)3                     -3.231e+07  3.923e+07  -0.824
## sexMale:poly(yrs.since.phd, 3)1            1.326e+04  3.446e+05   0.038
## sexMale:poly(yrs.since.phd, 3)2           -1.674e+05  3.496e+05  -0.479
## sexMale:poly(yrs.since.phd, 3)3           -1.433e+05  2.337e+05  -0.613
## sexMale:poly(yrs.service, 3)1             -2.434e+05  4.708e+05  -0.517
## sexMale:poly(yrs.service, 3)2             -6.969e+04  4.956e+05  -0.141
## sexMale:poly(yrs.service, 3)3              7.424e+04  2.841e+05   0.261
## rankAssocProf:poly(yrs.since.phd, 3)1      6.964e+06  4.073e+07   0.171
## rankProf:poly(yrs.since.phd, 3)1           7.174e+06  4.074e+07   0.176
## rankAssocProf:poly(yrs.since.phd, 3)2      4.286e+06  2.432e+07   0.176
## rankProf:poly(yrs.since.phd, 3)2           4.108e+06  2.432e+07   0.169
## rankAssocProf:poly(yrs.since.phd, 3)3      1.351e+06  6.003e+06   0.225
## rankProf:poly(yrs.since.phd, 3)3           1.195e+06  6.002e+06   0.199
## rankAssocProf:poly(yrs.service, 3)1        2.062e+08  2.549e+08   0.809
## rankProf:poly(yrs.service, 3)1             2.063e+08  2.549e+08   0.809
## rankAssocProf:poly(yrs.service, 3)2        1.344e+08  1.646e+08   0.817
## rankProf:poly(yrs.service, 3)2             1.344e+08  1.646e+08   0.817
## rankAssocProf:poly(yrs.service, 3)3        3.216e+07  3.926e+07   0.819
## rankProf:poly(yrs.service, 3)3             3.236e+07  3.926e+07   0.824
## disciplineApplied:poly(yrs.since.phd, 3)1 -3.291e+05  1.185e+05  -2.776
## disciplineApplied:poly(yrs.since.phd, 3)2  2.251e+04  8.885e+04   0.253
## disciplineApplied:poly(yrs.since.phd, 3)3  2.641e+05  7.259e+04   3.638
## disciplineApplied:poly(yrs.service, 3)1    3.863e+05  1.159e+05   3.332
## disciplineApplied:poly(yrs.service, 3)2   -2.009e+04  8.957e+04  -0.224
## disciplineApplied:poly(yrs.service, 3)3   -6.873e+04  7.195e+04  -0.955
##                                           Pr(>|t|)    
## (Intercept)                               0.426844    
## sexMale                                   0.935351    
## rankAssocProf                             0.421187    
## rankProf                                  0.419276    
## disciplineApplied                         5.04e-10 ***
## poly(yrs.since.phd, 3)1                   0.869070    
## poly(yrs.since.phd, 3)2                   0.864776    
## poly(yrs.since.phd, 3)3                   0.840060    
## poly(yrs.service, 3)1                     0.418859    
## poly(yrs.service, 3)2                     0.415077    
## poly(yrs.service, 3)3                     0.410741    
## sexMale:poly(yrs.since.phd, 3)1           0.969320    
## sexMale:poly(yrs.since.phd, 3)2           0.632425    
## sexMale:poly(yrs.since.phd, 3)3           0.540284    
## sexMale:poly(yrs.service, 3)1             0.605439    
## sexMale:poly(yrs.service, 3)2             0.888243    
## sexMale:poly(yrs.service, 3)3             0.794006    
## rankAssocProf:poly(yrs.since.phd, 3)1     0.864336    
## rankProf:poly(yrs.since.phd, 3)1          0.860302    
## rankAssocProf:poly(yrs.since.phd, 3)2     0.860233    
## rankProf:poly(yrs.since.phd, 3)2          0.865974    
## rankAssocProf:poly(yrs.since.phd, 3)3     0.822056    
## rankProf:poly(yrs.since.phd, 3)3          0.842295    
## rankAssocProf:poly(yrs.service, 3)1       0.419125    
## rankProf:poly(yrs.service, 3)1            0.418981    
## rankAssocProf:poly(yrs.service, 3)2       0.414700    
## rankProf:poly(yrs.service, 3)2            0.414626    
## rankAssocProf:poly(yrs.service, 3)3       0.413250    
## rankProf:poly(yrs.service, 3)3            0.410376    
## disciplineApplied:poly(yrs.since.phd, 3)1 0.005790 ** 
## disciplineApplied:poly(yrs.since.phd, 3)2 0.800116    
## disciplineApplied:poly(yrs.since.phd, 3)3 0.000314 ***
## disciplineApplied:poly(yrs.service, 3)1   0.000950 ***
## disciplineApplied:poly(yrs.service, 3)2   0.822634    
## disciplineApplied:poly(yrs.service, 3)3   0.340065    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21450 on 362 degrees of freedom
## Multiple R-squared:  0.5416, Adjusted R-squared:  0.4986 
## F-statistic: 12.58 on 34 and 362 DF,  p-value: < 2.2e-16

Run AIC stepwise on g.poly3.inter and save the results in g.poly3.inter.step.

g.poly3.inter.step<-step(g.poly3.inter)
## Start:  AIC=7952.23
## salary ~ (sex + rank + discipline) * (poly(yrs.since.phd, 3) + 
##     poly(yrs.service, 3))
## 
##                                     Df  Sum of Sq        RSS    AIC
## - rank:poly(yrs.since.phd, 3)        6 2.2812e+09 1.6881e+11 7945.6
## - rank:poly(yrs.service, 3)          6 2.8013e+09 1.6933e+11 7946.9
## - sex:poly(yrs.since.phd, 3)         3 5.1277e+08 1.6704e+11 7947.5
## - sex:poly(yrs.service, 3)           3 6.5744e+08 1.6718e+11 7947.8
## <none>                                            1.6653e+11 7952.2
## - discipline:poly(yrs.service, 3)    3 6.6772e+09 1.7320e+11 7961.8
## - discipline:poly(yrs.since.phd, 3)  3 1.2524e+10 1.7905e+11 7975.0
## 
## Step:  AIC=7945.63
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service, 
##     3) + sex:poly(yrs.since.phd, 3) + sex:poly(yrs.service, 3) + 
##     rank:poly(yrs.service, 3) + discipline:poly(yrs.since.phd, 
##     3) + discipline:poly(yrs.service, 3)
## 
##                                     Df  Sum of Sq        RSS    AIC
## - sex:poly(yrs.since.phd, 3)         3 5.1531e+08 1.6932e+11 7940.8
## - sex:poly(yrs.service, 3)           3 5.8039e+08 1.6939e+11 7941.0
## - rank:poly(yrs.service, 3)          6 3.6923e+09 1.7250e+11 7942.2
## <none>                                            1.6881e+11 7945.6
## - discipline:poly(yrs.service, 3)    3 7.3290e+09 1.7614e+11 7956.5
## - discipline:poly(yrs.since.phd, 3)  3 1.3015e+10 1.8182e+11 7969.1
## 
## Step:  AIC=7940.84
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service, 
##     3) + sex:poly(yrs.service, 3) + rank:poly(yrs.service, 3) + 
##     discipline:poly(yrs.since.phd, 3) + discipline:poly(yrs.service, 
##     3)
## 
##                                     Df  Sum of Sq        RSS    AIC
## - sex:poly(yrs.service, 3)           3 7.4505e+08 1.7007e+11 7936.6
## - rank:poly(yrs.service, 3)          6 3.6453e+09 1.7297e+11 7937.3
## <none>                                            1.6932e+11 7940.8
## - discipline:poly(yrs.service, 3)    3 7.0848e+09 1.7641e+11 7951.1
## - discipline:poly(yrs.since.phd, 3)  3 1.2894e+10 1.8222e+11 7964.0
## 
## Step:  AIC=7936.58
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service, 
##     3) + rank:poly(yrs.service, 3) + discipline:poly(yrs.since.phd, 
##     3) + discipline:poly(yrs.service, 3)
## 
##                                     Df  Sum of Sq        RSS    AIC
## - rank:poly(yrs.service, 3)          6 3.6146e+09 1.7368e+11 7932.9
## <none>                                            1.7007e+11 7936.6
## - sex                                1 1.3716e+09 1.7144e+11 7937.8
## - discipline:poly(yrs.service, 3)    3 6.8367e+09 1.7690e+11 7946.2
## - discipline:poly(yrs.since.phd, 3)  3 1.2673e+10 1.8274e+11 7959.1
## 
## Step:  AIC=7932.93
## salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + poly(yrs.service, 
##     3) + discipline:poly(yrs.since.phd, 3) + discipline:poly(yrs.service, 
##     3)
## 
##                                     Df  Sum of Sq        RSS    AIC
## <none>                                            1.7368e+11 7932.9
## - sex                                1 1.6091e+09 1.7529e+11 7934.6
## - discipline:poly(yrs.service, 3)    3 6.6327e+09 1.8032e+11 7941.8
## - discipline:poly(yrs.since.phd, 3)  3 1.1958e+10 1.8564e+11 7953.4
## - rank                               2 2.8967e+10 2.0265e+11 7990.2

Obtain the summary Estimated coefficients, Std. Error, t value, p-value.

summary(g.poly3.inter.step)
## 
## Call:
## lm(formula = salary ~ sex + rank + discipline + poly(yrs.since.phd, 
##     3) + poly(yrs.service, 3) + discipline:poly(yrs.since.phd, 
##     3) + discipline:poly(yrs.service, 3), data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -72276 -13777  -1088   9904  96619 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                  71274       6303  11.308  < 2e-16
## sexMale                                       6980       3720   1.876 0.061381
## rankAssocProf                                10064       5985   1.681 0.093501
## rankProf                                     39239       7111   5.518 6.35e-08
## disciplineApplied                            14507       2242   6.472 2.99e-10
## poly(yrs.since.phd, 3)1                     309877      94428   3.282 0.001128
## poly(yrs.since.phd, 3)2                     -87920      74035  -1.188 0.235752
## poly(yrs.since.phd, 3)3                    -216222      58070  -3.723 0.000226
## poly(yrs.service, 3)1                      -280526      84860  -3.306 0.001037
## poly(yrs.service, 3)2                        87812      73685   1.192 0.234115
## poly(yrs.service, 3)3                       101227      57032   1.775 0.076714
## disciplineApplied:poly(yrs.since.phd, 3)1  -333815     113100  -2.951 0.003359
## disciplineApplied:poly(yrs.since.phd, 3)2    41435      84678   0.489 0.624895
## disciplineApplied:poly(yrs.since.phd, 3)3   240994      68081   3.540 0.000450
## disciplineApplied:poly(yrs.service, 3)1     379210     112077   3.383 0.000790
## disciplineApplied:poly(yrs.service, 3)2     -47180      87062  -0.542 0.588197
## disciplineApplied:poly(yrs.service, 3)3     -59755      67544  -0.885 0.376890
##                                              
## (Intercept)                               ***
## sexMale                                   .  
## rankAssocProf                             .  
## rankProf                                  ***
## disciplineApplied                         ***
## poly(yrs.since.phd, 3)1                   ** 
## poly(yrs.since.phd, 3)2                      
## poly(yrs.since.phd, 3)3                   ***
## poly(yrs.service, 3)1                     ** 
## poly(yrs.service, 3)2                        
## poly(yrs.service, 3)3                     .  
## disciplineApplied:poly(yrs.since.phd, 3)1 ** 
## disciplineApplied:poly(yrs.since.phd, 3)2    
## disciplineApplied:poly(yrs.since.phd, 3)3 ***
## disciplineApplied:poly(yrs.service, 3)1   ***
## disciplineApplied:poly(yrs.service, 3)2      
## disciplineApplied:poly(yrs.service, 3)3      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21380 on 380 degrees of freedom
## Multiple R-squared:  0.5219, Adjusted R-squared:  0.5018 
## F-statistic: 25.93 on 16 and 380 DF,  p-value: < 2.2e-16

What is the difference in salary of men versus women?

#men are 60% in salary than women
  1. Compare coefficents Compare coefficients of all stepwise models constructed so far.
compar<-compareCoefs(g.step,g.quad.step,g.Poly2.step,g.Poly2.inter.step,g.poly3.step, g.poly3.inter.step, se = FALSE)
## Calls:
## 1: lm(formula = salary ~ rank + discipline + yrs.since.phd + yrs.service, 
##   data = Salaries)
## 2: lm(formula = salary ~ discipline + yrs.since.phd + I(yrs.since.phd^2), 
##   data = Salaries)
## 3: lm(formula = salary ~ rank + discipline + poly(yrs.since.phd, 2), data = 
##   Salaries)
## 4: lm(formula = salary ~ poly(yrs.since.phd, 2) + rank + discipline + 
##   discipline:poly(yrs.service, 2) + poly(yrs.since.phd, 2):rank + 
##   poly(yrs.since.phd, 2):discipline, data = Salaries)
## 5: lm(formula = salary ~ rank + discipline + poly(yrs.since.phd, 3) + 
##   poly(yrs.service, 3), data = Salaries)
## 6: lm(formula = salary ~ sex + rank + discipline + poly(yrs.since.phd, 3) + 
##   poly(yrs.service, 3) + discipline:poly(yrs.since.phd, 3) + 
##   discipline:poly(yrs.service, 3), data = Salaries)
## 
##                                              Model 1 Model 2  Model 3  Model 4
## (Intercept)                                    69869   54056    81643    -2859
## rankAssocProf                                  12832             5801    85573
## rankProf                                       45288            34849   115466
## disciplineApplied                              14505   15414    14297    14978
## yrs.since.phd                                    535    4172                  
## yrs.service                                     -477                          
## I(yrs.since.phd^2)                                       -63                  
## poly(yrs.since.phd, 2)1                                         77194 -1265575
## poly(yrs.since.phd, 2)2                                        -83192  -556476
## disciplineTheorectical:poly(yrs.service, 2)1                           -341839
## disciplineApplied:poly(yrs.service, 2)1                                 107675
## disciplineTheorectical:poly(yrs.service, 2)2                             -4501
## disciplineApplied:poly(yrs.service, 2)2                                  92382
## poly(yrs.since.phd, 2)1:rankAssocProf                                  1499868
## poly(yrs.since.phd, 2)2:rankAssocProf                                   552919
## poly(yrs.since.phd, 2)1:rankProf                                       1754635
## poly(yrs.since.phd, 2)2:rankProf                                        408006
## poly(yrs.since.phd, 2)1:disciplineApplied                              -399311
## poly(yrs.since.phd, 2)2:disciplineApplied                               -50234
## poly(yrs.since.phd, 3)1                                                       
## poly(yrs.since.phd, 3)2                                                       
## poly(yrs.since.phd, 3)3                                                       
## poly(yrs.service, 3)1                                                         
## poly(yrs.service, 3)2                                                         
## poly(yrs.service, 3)3                                                         
## sexMale                                                                       
## disciplineApplied:poly(yrs.since.phd, 3)1                                     
## disciplineApplied:poly(yrs.since.phd, 3)2                                     
## disciplineApplied:poly(yrs.since.phd, 3)3                                     
## disciplineApplied:poly(yrs.service, 3)1                                       
## disciplineApplied:poly(yrs.service, 3)2                                       
## disciplineApplied:poly(yrs.service, 3)3                                       
##                                              Model 5 Model 6
## (Intercept)                                    76227   71274
## rankAssocProf                                  11840   10064
## rankProf                                       41462   39239
## disciplineApplied                              14318   14507
## yrs.since.phd                                               
## yrs.service                                                 
## I(yrs.since.phd^2)                                          
## poly(yrs.since.phd, 2)1                                     
## poly(yrs.since.phd, 2)2                                     
## disciplineTheorectical:poly(yrs.service, 2)1                
## disciplineApplied:poly(yrs.service, 2)1                     
## disciplineTheorectical:poly(yrs.service, 2)2                
## disciplineApplied:poly(yrs.service, 2)2                     
## poly(yrs.since.phd, 2)1:rankAssocProf                       
## poly(yrs.since.phd, 2)2:rankAssocProf                       
## poly(yrs.since.phd, 2)1:rankProf                            
## poly(yrs.since.phd, 2)2:rankProf                            
## poly(yrs.since.phd, 2)1:disciplineApplied                   
## poly(yrs.since.phd, 2)2:disciplineApplied                   
## poly(yrs.since.phd, 3)1                       131838  309877
## poly(yrs.since.phd, 3)2                       -80680  -87920
## poly(yrs.since.phd, 3)3                      -107486 -216222
## poly(yrs.service, 3)1                         -91868 -280526
## poly(yrs.service, 3)2                          50992   87812
## poly(yrs.service, 3)3                          77369  101227
## sexMale                                                 6980
## disciplineApplied:poly(yrs.since.phd, 3)1            -333815
## disciplineApplied:poly(yrs.since.phd, 3)2              41435
## disciplineApplied:poly(yrs.since.phd, 3)3             240994
## disciplineApplied:poly(yrs.service, 3)1               379210
## disciplineApplied:poly(yrs.service, 3)2               -47180
## disciplineApplied:poly(yrs.service, 3)3               -59755
colnames(compar) <- c("g.step", "g.quad.step", "g.Poly2.step",  "g.Poly2.inter.step",  "g.poly3.step",  "g.poly3.inter.step")
compar
##                                                  g.step g.quad.step
## (Intercept)                                  69869.0110 54056.23664
## rankAssocProf                                12831.5375          NA
## rankProf                                     45287.6890          NA
## disciplineApplied                            14505.1514 15413.55206
## yrs.since.phd                                  534.6313  4171.91362
## yrs.service                                   -476.7179          NA
## I(yrs.since.phd^2)                                   NA   -63.03656
## poly(yrs.since.phd, 2)1                              NA          NA
## poly(yrs.since.phd, 2)2                              NA          NA
## disciplineTheorectical:poly(yrs.service, 2)1         NA          NA
## disciplineApplied:poly(yrs.service, 2)1              NA          NA
## disciplineTheorectical:poly(yrs.service, 2)2         NA          NA
## disciplineApplied:poly(yrs.service, 2)2              NA          NA
## poly(yrs.since.phd, 2)1:rankAssocProf                NA          NA
## poly(yrs.since.phd, 2)2:rankAssocProf                NA          NA
## poly(yrs.since.phd, 2)1:rankProf                     NA          NA
## poly(yrs.since.phd, 2)2:rankProf                     NA          NA
## poly(yrs.since.phd, 2)1:disciplineApplied            NA          NA
## poly(yrs.since.phd, 2)2:disciplineApplied            NA          NA
## poly(yrs.since.phd, 3)1                              NA          NA
## poly(yrs.since.phd, 3)2                              NA          NA
## poly(yrs.since.phd, 3)3                              NA          NA
## poly(yrs.service, 3)1                                NA          NA
## poly(yrs.service, 3)2                                NA          NA
## poly(yrs.service, 3)3                                NA          NA
## sexMale                                              NA          NA
## disciplineApplied:poly(yrs.since.phd, 3)1            NA          NA
## disciplineApplied:poly(yrs.since.phd, 3)2            NA          NA
## disciplineApplied:poly(yrs.since.phd, 3)3            NA          NA
## disciplineApplied:poly(yrs.service, 3)1              NA          NA
## disciplineApplied:poly(yrs.service, 3)2              NA          NA
## disciplineApplied:poly(yrs.service, 3)3              NA          NA
##                                              g.Poly2.step g.Poly2.inter.step
## (Intercept)                                     81642.915          -2858.586
## rankAssocProf                                    5800.885          85573.330
## rankProf                                        34848.875         115465.616
## disciplineApplied                               14297.080          14977.945
## yrs.since.phd                                          NA                 NA
## yrs.service                                            NA                 NA
## I(yrs.since.phd^2)                                     NA                 NA
## poly(yrs.since.phd, 2)1                         77193.528       -1265574.904
## poly(yrs.since.phd, 2)2                        -83192.377        -556475.682
## disciplineTheorectical:poly(yrs.service, 2)1           NA        -341838.695
## disciplineApplied:poly(yrs.service, 2)1                NA         107674.985
## disciplineTheorectical:poly(yrs.service, 2)2           NA          -4501.257
## disciplineApplied:poly(yrs.service, 2)2                NA          92381.735
## poly(yrs.since.phd, 2)1:rankAssocProf                  NA        1499867.742
## poly(yrs.since.phd, 2)2:rankAssocProf                  NA         552918.834
## poly(yrs.since.phd, 2)1:rankProf                       NA        1754635.272
## poly(yrs.since.phd, 2)2:rankProf                       NA         408006.311
## poly(yrs.since.phd, 2)1:disciplineApplied              NA        -399311.445
## poly(yrs.since.phd, 2)2:disciplineApplied              NA         -50233.580
## poly(yrs.since.phd, 3)1                                NA                 NA
## poly(yrs.since.phd, 3)2                                NA                 NA
## poly(yrs.since.phd, 3)3                                NA                 NA
## poly(yrs.service, 3)1                                  NA                 NA
## poly(yrs.service, 3)2                                  NA                 NA
## poly(yrs.service, 3)3                                  NA                 NA
## sexMale                                                NA                 NA
## disciplineApplied:poly(yrs.since.phd, 3)1              NA                 NA
## disciplineApplied:poly(yrs.since.phd, 3)2              NA                 NA
## disciplineApplied:poly(yrs.since.phd, 3)3              NA                 NA
## disciplineApplied:poly(yrs.service, 3)1                NA                 NA
## disciplineApplied:poly(yrs.service, 3)2                NA                 NA
## disciplineApplied:poly(yrs.service, 3)3                NA                 NA
##                                              g.poly3.step g.poly3.inter.step
## (Intercept)                                      76226.77          71274.302
## rankAssocProf                                    11840.12          10063.563
## rankProf                                         41462.55          39238.774
## disciplineApplied                                14317.73          14506.866
## yrs.since.phd                                          NA                 NA
## yrs.service                                            NA                 NA
## I(yrs.since.phd^2)                                     NA                 NA
## poly(yrs.since.phd, 2)1                                NA                 NA
## poly(yrs.since.phd, 2)2                                NA                 NA
## disciplineTheorectical:poly(yrs.service, 2)1           NA                 NA
## disciplineApplied:poly(yrs.service, 2)1                NA                 NA
## disciplineTheorectical:poly(yrs.service, 2)2           NA                 NA
## disciplineApplied:poly(yrs.service, 2)2                NA                 NA
## poly(yrs.since.phd, 2)1:rankAssocProf                  NA                 NA
## poly(yrs.since.phd, 2)2:rankAssocProf                  NA                 NA
## poly(yrs.since.phd, 2)1:rankProf                       NA                 NA
## poly(yrs.since.phd, 2)2:rankProf                       NA                 NA
## poly(yrs.since.phd, 2)1:disciplineApplied              NA                 NA
## poly(yrs.since.phd, 2)2:disciplineApplied              NA                 NA
## poly(yrs.since.phd, 3)1                         131838.27         309877.278
## poly(yrs.since.phd, 3)2                         -80680.09         -87919.788
## poly(yrs.since.phd, 3)3                        -107485.90        -216222.540
## poly(yrs.service, 3)1                           -91867.57        -280525.823
## poly(yrs.service, 3)2                            50991.53          87811.952
## poly(yrs.service, 3)3                            77368.75         101226.797
## sexMale                                                NA           6980.412
## disciplineApplied:poly(yrs.since.phd, 3)1              NA        -333814.783
## disciplineApplied:poly(yrs.since.phd, 3)2              NA          41435.171
## disciplineApplied:poly(yrs.since.phd, 3)3              NA         240994.149
## disciplineApplied:poly(yrs.service, 3)1                NA         379210.413
## disciplineApplied:poly(yrs.service, 3)2                NA         -47179.698
## disciplineApplied:poly(yrs.service, 3)3                NA         -59754.637
  1. Perform a cross-validation of all the stepwise models Using folds equal to 3, perform a cross-validation of all the stepwise models: g.poly2.step, g.poly2.inter.step, g.poly3.step, g.poly3.inter.step. Repeat the CV 10 times using a for loop. Save the mse values in a separate dataframe.
library(DAAG)
## Loading required package: lattice
## 
## Attaching package: 'DAAG'
## The following object is masked from 'package:car':
## 
##     vif
newseed <- round(runif(1, min=0, max=100))
num.fold <- 3 
oldpar <- par(mfrow = c(1, 2))
df <- data.frame(matrix(ncol = 4, nrow = 0)) 
colnames(df) <- c('mse.g.Poly2.step', 'mse.g.Poly2.inter.step', 'mse.g.poly3.step', 'mse.g.poly3.inter.step')
for (i in 1:10) {
mse.g.Poly2.step<- CVlm(data = Salaries, 
                    form.lm = g.Poly2.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.Poly2.step",
                    printit = FALSE)
mse.g.Poly2.inter.step<-CVlm(data = Salaries, 
                    form.lm = g.Poly2.inter.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.Poly2.inter.step",
                    printit = FALSE)
  
mse.g.poly3.step<-CVlm(data = Salaries, 
                    form.lm = g.poly3.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.poly3.step",
                    printit = FALSE)
mse.g.poly3.inter.step<-CVlm(data = Salaries, 
                    form.lm = g.poly3.inter.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.poly3.inter.step",
                    printit = FALSE)
  par(oldpar)
 
df[nrow(df) + 1,] = data.frame(mse.g.Poly2.step=attr(mse.g.Poly2.step, "ms"),
             mse.g.Poly2.inter.step=attr(mse.g.Poly2.inter.step, "ms"),
             mse.g.poly3.step=attr(mse.g.poly3.step, "ms"),
             mse.g.poly3.inter.step=attr(mse.g.poly3.inter.step, "ms"))
}
## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

df
##    mse.g.Poly2.step mse.g.Poly2.inter.step mse.g.poly3.step
## 1         537920804              577955925        570982659
## 2         537920804              577955925        570982659
## 3         537920804              577955925        570982659
## 4         537920804              577955925        570982659
## 5         537920804              577955925        570982659
## 6         537920804              577955925        570982659
## 7         537920804              577955925        570982659
## 8         537920804              577955925        570982659
## 9         537920804              577955925        570982659
## 10        537920804              577955925        570982659
##    mse.g.poly3.inter.step
## 1               563536787
## 2               563536787
## 3               563536787
## 4               563536787
## 5               563536787
## 6               563536787
## 7               563536787
## 8               563536787
## 9               563536787
## 10              563536787
  1. Compare all the above stepwise models using the mse’s Compare the all the above models using the mse’s from the cross-validations with number of folds equal to 3.
library(DAAG)
newseed <- round(runif(1, min=0, max=100))
num.fold <- 3 
oldpar <- par(mfrow = c(1, 2))
 mse.g.Poly2.step <- CVlm(data = Salaries, 
                    form.lm = g.Poly2.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.Poly2.step",
                    printit = FALSE)
## Warning in CVlm(data = Salaries, form.lm = g.Poly2.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
  mse.g.Poly2.inter.step <- CVlm(data = Salaries, 
                    form.lm = g.Poly2.inter.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.Poly2.inter.step",
                    printit = FALSE)
## Warning in CVlm(data = Salaries, form.lm = g.Poly2.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

  mse.g.poly3.step <- CVlm(data = Salaries, 
                    form.lm = g.poly3.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.poly3.step",
                    printit = FALSE)
## Warning in CVlm(data = Salaries, form.lm = g.poly3.step, m = num.fold, seed = newseed, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate
  mse.g.poly3.inter.step <- CVlm(data = Salaries, 
                    form.lm = g.poly3.inter.step, 
                    m = num.fold,
                    seed=newseed,
                    main = "Prediction Plot: g.poly3.inter.step",
                    printit = FALSE)
## Warning in CVlm(data = Salaries, form.lm = g.poly3.inter.step, m = num.fold, : 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

par(oldpar)
   data.frame(mse.g.Poly2.step=attr(mse.g.Poly2.step, "ms"),
             mse.g.Poly2.inter.step=attr(mse.g.Poly2.inter.step, "ms"),
             mse.g.poly3.step=attr(mse.g.poly3.step, "ms"),
             mse.g.poly3.inter.step=attr(mse.g.poly3.inter.step, "ms"))
##   mse.g.Poly2.step mse.g.Poly2.inter.step mse.g.poly3.step
## 1        514525960              495657834        518951446
##   mse.g.poly3.inter.step
## 1              497658817

12 Which models pass hypothesis testing?

g.poly2.step vs g.poly2.inter.step

beta0= g.Poly2.step
beta1=g.Poly2.inter.step
#H0= beta0=0, beta1=0
#H1= either beta0 !=0, beta1!=0
library(rsq)
rsq.partial(g.Poly2.step, g.Poly2.inter.step)
## $adjustment
## [1] FALSE
## 
## $variables.full
## [1] "rank"                   "discipline"             "poly(yrs.since.phd, 2)"
## 
## $variables.reduced
## [1] "poly(yrs.since.phd, 2)"            "rank"                             
## [3] "discipline"                        "discipline:poly(yrs.service, 2)"  
## [5] "poly(yrs.since.phd, 2):rank"       "poly(yrs.since.phd, 2):discipline"
## 
## $partial.rsq
## [1] -0.09822421

Perform the ANOVA test of models g.poly2.step vs g.poly2.inter.step

data.frame(tab7 <- anova(g.Poly2.step,g.Poly2.inter.step))
##   Res.Df          RSS Df   Sum.of.Sq        F       Pr..F.
## 1    391 198247920917 NA          NA       NA           NA
## 2    381 180516800326 10 17731120591 3.742343 8.192489e-05

What is the p-value?

p_value<-8.192489e-05

Using alpha <- 0.01, perform the hypothesis test based on the p-value, what is the conclusion of the test?

alpha=0.01
if(p_value<alpha)"g.Poly2.inter.step"else"g.Poly2.step"
## [1] "g.Poly2.inter.step"

g.poly3.step vs g.poly3.inter.step

beta0= g.poly3.step
beta1=g.poly3.inter.step
#H0= beta0=0, beta1=0
#H1= either beta0 !=0, beta1!=0
library(rsq)
rsq.partial(g.poly3.step, g.poly3.inter.step)
## $adjustment
## [1] FALSE
## 
## $variables.full
## [1] "rank"                   "discipline"             "poly(yrs.since.phd, 3)"
## [4] "poly(yrs.service, 3)"  
## 
## $variables.reduced
## [1] "sex"                               "rank"                             
## [3] "discipline"                        "poly(yrs.since.phd, 3)"           
## [5] "poly(yrs.service, 3)"              "discipline:poly(yrs.since.phd, 3)"
## [7] "discipline:poly(yrs.service, 3)"  
## 
## $partial.rsq
## [1] -0.09620257

Perform the ANOVA test of models g.poly3.step vs g.poly3.inter.step

data.frame(tab7 <- anova(g.poly3.step,g.poly3.inter.step))
##   Res.Df          RSS Df   Sum.of.Sq        F       Pr..F.
## 1    387 190391054429 NA          NA       NA           NA
## 2    380 173682364018  7 16708690410 5.222425 1.056704e-05

What is the p-value?

p_value<-1.056704e-05

Using alpha <- 0.01, perform the hypothesis test based on the p-value, what is the conclusion of the test?

alpha=0.01
if(p_value<alpha)"g.poly3.inter.step"else"g.poly3.step"
## [1] "g.poly3.inter.step"